#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Midterm Projection (Years 1-5) using CESM-DPLE precip, temp with KNN block bootstraping of observed flows #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Version 1.1
# 18 June 2020
#
# ~~~~ Updates ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
# Functionalized redundant code
# Weighted Euclidean distance calculation with correlation between covariate and flow
# Using ensemble mean for each LE covariate, instead of quantiles
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
#### Author: David Woodson
#### CLEAR MEMORY
rm(list=ls())
# #packages
# library(dplyr)
# #library(tidyr)
# library(lubridate)
# library(ggplot2)
# library(data.table)
# library(ggrepel)
# library(RColorBrewer)
# library(readxl)
# library(hydroGOF)
# library(kknn)
# library(plyr)
# library(qpcR)
# library(easyVerification)
# library(verification)
# library(reshape)
# library(stringr)
# library(ggpubr)
# library(randomForest)
# library(tidytext)
# library(DescTools)
# library(viridis)
# library(scatterplot3d)
#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="K:/My Drive/Phd Research/CRB Midterm Temperature Perturbed Predictions/Code/Phase 1/years1-10"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1_orig.R"))
## Warning: package 'leaps' was built under R version 3.6.3
## Warning: package 'KernSmooth' was built under R version 3.6.3
## Warning: package 'akima' was built under R version 3.6.3
## Warning: package 'fields' was built under R version 3.6.3
## Warning: package 'spam' was built under R version 3.6.3
## Warning: package 'mgcv' was built under R version 3.6.3
## Warning: package 'nlme' was built under R version 3.6.3
## Warning: package 'locfit' was built under R version 3.6.3
## Warning: package 'MASS' was built under R version 3.6.3
## Warning: package 'prodlim' was built under R version 3.6.3
## Warning: package 'rgdal' was built under R version 3.6.3
## Warning: package 'sp' was built under R version 3.6.3
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'reshape2' was built under R version 3.6.3
## Warning: package 'scales' was built under R version 3.6.3
source("Lib_HW1_orig.R")
suppressPackageStartupMessages(source("Lib_HW2.R"))
## Warning: package 'VGAM' was built under R version 3.6.3
## Warning: package 'nnet' was built under R version 3.6.3
## Warning: package 'boot' was built under R version 3.6.3
## Warning: package 'dtw' was built under R version 3.6.3
## Warning: package 'proxy' was built under R version 3.6.3
## Warning: package 'spBayes' was built under R version 3.6.3
## Warning: package 'geoR' was built under R version 3.6.3
## Warning: package 'mclust' was built under R version 3.6.3
## Warning: package 'kohonen' was built under R version 3.6.3
source("Lib_HW2.R")
#### Clear the R console
cat("\f")
lib = "K:/My Drive/Phd Research/CRB Midterm Temperature Perturbed Predictions/Code/Phase 1/midTerm_forecasts/midTermForecast_Library_v1.4.R"
suppressPackageStartupMessages(source(lib))
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'lubridate' was built under R version 3.6.3
## Warning: package 'data.table' was built under R version 3.6.3
## Warning: package 'ggrepel' was built under R version 3.6.3
## Warning: package 'hydroGOF' was built under R version 3.6.3
## Warning: package 'zoo' was built under R version 3.6.3
## Warning: package 'plyr' was built under R version 3.6.3
## Warning: package 'qpcR' was built under R version 3.6.2
## Warning: package 'minpack.lm' was built under R version 3.6.2
## Warning: package 'rgl' was built under R version 3.6.3
## Warning: package 'robustbase' was built under R version 3.6.3
## Warning: package 'Matrix' was built under R version 3.6.3
## Warning: package 'easyVerification' was built under R version 3.6.2
## Warning: package 'SpecsVerification' was built under R version 3.6.3
## Warning: package 'ggpubr' was built under R version 3.6.3
## Warning: package 'tidytext' was built under R version 3.6.3
## Warning: package 'DescTools' was built under R version 3.6.3
## Warning: package 'viridis' was built under R version 3.6.3
## Warning: package 'xts' was built under R version 3.6.3
## Warning: package 'ggridges' was built under R version 3.6.3
source(lib)
# meanLengths = c(3,5,7)
meanLengths = 2:10
n = length(meanLengths)
fcsts = vector("list", n)
meanLength = 3
fcstMode = 'kFold'
for(i in meanLengths){
cat("\n")
cat(paste0("Mean length: ", i, "years"))
cat("\n")
fcsts[[i]] = midTermProjection(meanLength = i, fcstMode = 'kFold')
cat("\n")
cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
cat("\n")
}
##
## Mean length: 2years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'






## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'









































##
## CESM-DPLE, KNN Forecast DI = 46.8571428571428
##
## CESM-DPLE, RF Forecast DI = 56.5714285714285
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables



##
## ESP DI = 65.4285714285714







##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 3years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'








































##
## CESM-DPLE, KNN Forecast DI = 79.4117647058823
##
## CESM-DPLE, RF Forecast DI = 68.8235294117647
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables



##
## ESP DI = 60







##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 4years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







































##
## CESM-DPLE, KNN Forecast DI = 56.3636363636364
##
## CESM-DPLE, RF Forecast DI = 80.6060606060606
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables



##
## ESP DI = 61.2121212121211







##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 5years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'






































##
## CESM-DPLE, KNN Forecast DI = 78.75
##
## CESM-DPLE, RF Forecast DI = 68.75
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables



##
## ESP DI = 49.6875







##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 6years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'





































##
## CESM-DPLE, KNN Forecast DI = 75.8064516129032
##
## CESM-DPLE, RF Forecast DI = 85.8064516129033
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables








##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 7years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'




































##
## CESM-DPLE, KNN Forecast DI = 77.3333333333334
##
## CESM-DPLE, RF Forecast DI = 71.9999999999999
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables








##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 8years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'



































##
## CESM-DPLE, KNN Forecast DI = 75.5172413793104
##
## CESM-DPLE, RF Forecast DI = 80.3448275862069
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables








##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 9years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'


































##
## CESM-DPLE, KNN Forecast DI = 82.1428571428571
##
## CESM-DPLE, RF Forecast DI = 98.5714285714286
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables








##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 10years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'







## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'





##
## CESM-DPLE, KNN Forecast DI = 124.074074074074
##
## CESM-DPLE, RF Forecast DI = 128.148148148148
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables









##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstSkillScores = NULL
fcstReliability = NULL
fcstReliabilityScores = NULL
for(i in meanLengths){
cat("\n")
cat(paste0("Mean length: ", i, "years"))
cat("\n")
#skill scores
fcsts[[i]][[2]]$meanLength = i
fcstSkillScores = bind_rows(fcstSkillScores, fcsts[[i]][[2]])
#reliability plot data (probabilities)
fcstReliability = bind_rows(fcstReliability, fcsts[[i]][[5]])
#reliability scores
fcstReliabilityScores = bind_rows(fcstReliabilityScores, fcsts[[i]][[6]])
cat("\n")
cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
cat("\n")
}
##
## Mean length: 2years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 3years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 4years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 5years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 6years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 7years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 8years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 9years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 10years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstSkillScores$meanLength = factor(fcstSkillScores$meanLength)
fcstSkillScores$variable = as.character(fcstSkillScores$variable)
fcstSkillScores$plotLabel = paste0("Mean Length (N) = ", fcstSkillScores$meanLength, "-years")
# fcstReliability$meanLength = factor(fcstReliability$meanLength)
fcstReliability$plotLabel = paste0("Mean Length (N) = ", fcstReliability$meanLength, "-years")
save forecasts and skillscores
saveRDS(fcsts, "RF_forecasts_v2.07_blindKFold.Rds")
# test = readRDS("RF_forecasts_v2.07_blindKFold.Rds")
saveRDS(fcstSkillScores, "RF_forecasts_skillscores_v2.07_blindKFold.Rds")
read forecasts and skillscores
fcsts = readRDS("RF_forecasts_v2.07_blindKFold.Rds")
# test = readRDS("RF_forecasts_v2.07_blindKFold.Rds")
fcstSkillScores = readRDS("RF_forecasts_skillscores_v2.07_blindKFold.Rds")
plot skill scores
plot1 = ggplot(data = subset(fcstSkillScores, variable == "RPSS" & model != "CESM-DPLE, KNN"), aes(x = meanLength, y = value, fill = model)) +
geom_boxplot() + theme_bw() +
#coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
#ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
theme(axis.title = element_text(face = "bold"), text=element_text(size=20)) +
ylab("RPSS")
print(plot1)

ggsave("rpss_flowTerciles_AllMeanLengths_blind_RFonly.png", plot = print(plot1), device = "png", width = 10, height = 5, dpi = 450, units = "in")
plot2 = ggplot(data = subset(fcstSkillScores, variable == "CRPSS" & model != "CESM-DPLE, KNN"), aes(x = meanLength, y = value, fill = model)) +
geom_boxplot() + theme_bw() +
#coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
#ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
theme(axis.title = element_text(face = "bold"), text=element_text(size=20)) +
ylab("CRPSS")
print(plot2)

ggsave("crpss_flowTerciles_AllMeanLengths_blind_RFonly.png", plot = print(plot2), device = "png", width = 10, height = 5, dpi = 450, units = "in")
```r
#save boxplot quartiles
d1 = ggplot_build(plot1)$data
print(d1[[1]])
```
```
## fill ymin lower middle upper ymax
## 1 #F8766D -1.8040665 -0.4389345 -0.054902154 0.6398075 0.9478514
## 2 #00BFC4 -0.9753086 -0.4711742 0.003227384 0.2709304 0.7041867
## 3 #F8766D -1.0427675 -0.1699043 0.408646823 0.6114730 0.9803672
## 4 #00BFC4 -0.8916716 -0.2857795 -0.152333927 0.3663306 0.8190696
## 5 #F8766D -1.5911608 -0.2475288 0.241560091 0.6673449 0.9366981
## 6 #00BFC4 -0.8337592 -0.3116778 -0.150561798 0.2438563 0.7421780
## 7 #F8766D -1.7175887 -0.7868249 0.303927773 0.5527396 0.9715077
## 8 #00BFC4 -1.3769778 -0.6019413 -0.280336238 0.3757278 0.7924998
## 9 #F8766D -1.6476009 -0.3881080 0.328427356 0.6562341 0.9739151
## 10 #F8766D -1.4600757 -0.4008399 0.208940898 0.6822097 0.9638551
## 11 #F8766D -2.3341885 -0.7918139 -0.025508069 0.6390359 0.9999775
## 12 #F8766D -2.1273847 -0.8454972 0.146380523 0.5836548 1.0000000
## 13 #F8766D -3.2214716 -1.7562973 -0.703087347 -0.1081327 1.0000000
## outliers notchupper notchlower x flipped_aes
## 1 0.233196108 -0.34300042 0.8125 FALSE
## 2 0.201420326 -0.19496556 1.1875 FALSE
## 3 -1.752894, -1.974865, -1.448036 0.620374864 0.19691878 1.8125 FALSE
## 4 0.024366882 -0.32903473 2.1875 FALSE
## 5 -1.784775 0.493189445 -0.01006926 2.8125 FALSE
## 6 0.002233805 -0.30335740 3.1875 FALSE
## 7 0.678077747 -0.07022220 3.8125 FALSE
## 8 -0.007266238 -0.55340624 4.1875 FALSE
## 9 0.624786967 0.03206774 5.0000 FALSE
## 10 0.521365195 -0.10348340 6.0000 FALSE
## 11 0.394301334 -0.44531747 7.0000 FALSE
## 12 -3.481195 0.573113790 -0.28035274 8.0000 FALSE
## 13 -0.201928059 -1.20424663 9.0000 FALSE
## PANEL group ymin_final ymax_final xmin xmax weight colour size alpha
## 1 1 1 -1.8040665 0.9478514 0.64375 0.98125 1 grey20 0.5 NA
## 2 1 2 -0.9753086 0.7041867 1.01875 1.35625 1 grey20 0.5 NA
## 3 1 3 -1.9748651 0.9803672 1.64375 1.98125 1 grey20 0.5 NA
## 4 1 4 -0.8916716 0.8190696 2.01875 2.35625 1 grey20 0.5 NA
## 5 1 5 -1.7847748 0.9366981 2.64375 2.98125 1 grey20 0.5 NA
## 6 1 6 -0.8337592 0.7421780 3.01875 3.35625 1 grey20 0.5 NA
## 7 1 7 -1.7175887 0.9715077 3.64375 3.98125 1 grey20 0.5 NA
## 8 1 8 -1.3769778 0.7924998 4.01875 4.35625 1 grey20 0.5 NA
## 9 1 9 -1.6476009 0.9739151 4.66250 5.33750 1 grey20 0.5 NA
## 10 1 10 -1.4600757 0.9638551 5.66250 6.33750 1 grey20 0.5 NA
## 11 1 11 -2.3341885 0.9999775 6.66250 7.33750 1 grey20 0.5 NA
## 12 1 12 -3.4811948 1.0000000 7.66250 8.33750 1 grey20 0.5 NA
## 13 1 13 -3.2214716 1.0000000 8.66250 9.33750 1 grey20 0.5 NA
## shape linetype
## 1 19 solid
## 2 19 solid
## 3 19 solid
## 4 19 solid
## 5 19 solid
## 6 19 solid
## 7 19 solid
## 8 19 solid
## 9 19 solid
## 10 19 solid
## 11 19 solid
## 12 19 solid
## 13 19 solid
```
```r
#write.csv(d5, paste0(path, fcstType, "-", reservoir, "-", meanLength, "yr_SeptemberOnlyRMSEbyEns_boxplotSummary.xslx"), sep = ",")
saveRDS(d1, "rpss_flowTerciles_AllMeanLengths_blind_RFonly_boxplotSummary.rds")
dat1 = d1[[1]]
y1 = (dat1$middle[1] - dat1$middle[2])/dat1$middle[2]
```
plot reliability
plot3 = ggplot(data = subset(fcstReliabilityScores, model!= "CESM-DPLE, KNN"), aes(x = factor(meanLength), y = V1, color = model)) +
geom_point() + theme_bw() +
#coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
#ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
theme(axis.title = element_text(face = "bold")) +
ylab("Reliability Score")
print(plot3)

ggsave("reliabilityScore_AllMeanLengths_blind_RFonly.png", plot = print(plot3), device = "png", dpi = 450, units = "in")
## Saving 7 x 5 in image
fcstRMSEs = NULL
for(i in meanLengths){
cat("\n")
cat(paste0("Mean length: ", i, "years"))
cat("\n")
rmse.i = melt.list(fcsts[[i]][[3]])
rmse.i$meanLength = i
fcstRMSEs = bind_rows(fcstRMSEs, rmse.i)
cat("\n")
cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
cat("\n")
}
##
## Mean length: 2years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 3years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 4years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 5years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 6years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 7years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 8years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 9years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 10years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstRMSEs$meanLength = factor(fcstRMSEs$meanLength)
colnames(fcstRMSEs)[2] = "model"
plot = ggplot(data = subset(fcstRMSEs, model != "CESM-DPLE, KNN"), aes(x = meanLength, y = value, shape = model)) +
geom_point() + theme_bw() +
#coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
#ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
theme(axis.title = element_text(face = "bold")) +
ylab("RMSE (MAF)")
print(plot)

ggsave("RMSEs_AllMeanLengths_blind_RFonly.png", plot = print(plot), device = "png", width = 10, height = 5, dpi = 450, units = "in")
fcstEnsembles = NULL
clim.Nyrs = matrix(NA, nrow = length(meanLengths), ncol = 2)
clim.Nyrs[,1] = meanLengths
i = 2
for(i in meanLengths){
cat("\n")
cat(paste0("Mean length: ", i, "years"))
cat("\n")
fcst.i = fcsts[[i]][[1]]$NyrMeanFlowBootstrap
fcst.i$meanLength = i
fcstEnsembles = bind_rows(fcstEnsembles, fcst.i)
clim.Nyrs[i-1, 2] = as.numeric(fcsts[[i]][[4]])
cat("\n")
cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
cat("\n")
}
##
## Mean length: 2years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 3years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 4years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 5years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 6years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 7years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 8years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 9years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
##
## Mean length: 10years
##
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstEnsembles$meanLength = factor(fcstEnsembles$meanLength)
colnames(clim.Nyrs) = c("meanLength", "clim.Nyr.i")
clim.Nyrs = data.frame(clim.Nyrs)
clim.Nyrs$meanLength = factor(clim.Nyrs$meanLength)
fcstEnsembles$plotLabel = paste0("Mean Length (N) = ", fcstEnsembles$meanLength, "-years")
plot = ggplot(subset(fcstEnsembles, covariate != "CESM-DPLE, KNN"), aes(x = startYear, y = q.f_maf, group = interaction(startYear, covariate), fill = covariate)) +
geom_boxplot() + labs(colour = "Covariates & Model") + theme_bw() +
geom_point(aes(x = startYear, y = obsFlow), color = "red", stroke = 1, pch = 3) +
# geom_crossbar(data = subset(fcstEnsembles, covariate != "CESM-DPLE, KNN"), aes(x = startYear, y = obsFlow, ymin=obsFlow, ymax=obsFlow), position=position_dodge(), color="blue", fatten = 1.5) +
xlab("Year") + ylab("N-year mean flow (MAF)") +
theme(axis.title = element_text(face = "bold"), text=element_text(size=25)) +
geom_hline(data = clim.Nyrs, aes(yintercept = clim.Nyr.i), col = "black", linetype="dashed") +
scale_x_continuous(breaks=seq(1980, 2010, 10)) +
scale_fill_manual(values = "lightblue") +
#scale_linetype_manual(name = "Climatology", values = 1, guide = guide_legend(override.aes = list(color = c("black")))) +
facet_wrap(~meanLength, scales = "free", shrink = F)
print(plot)

ggsave("fcstEnsembles_AllMeanLengths_blind_RFonly.png", plot = print(plot), device = "png", width = 15, height = 10, dpi = 450, units = "in")
plot = ggplot(subset(fcstEnsembles, covariate != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7)), aes(x = startYear, y = q.f_maf, group = interaction(startYear, covariate), fill = covariate)) +
geom_boxplot() + labs(fill = "Covariates & Model") + theme_bw() +
geom_point(aes(x = startYear, y = obsFlow), color = "red", stroke = 2, pch = 3) +
# geom_crossbar(data = subset(fcstEnsembles, covariate != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7)), aes(x = startYear, y = obsFlow, ymin=obsFlow, ymax=obsFlow), position=position_dodge(), color="red", fatten = 2) +
xlab("Year") + ylab("N-year mean flow (MAF)") +
theme(axis.title = element_text(face = "bold"), text=element_text(size=25)) +
geom_hline(data = clim.Nyrs, aes(yintercept = clim.Nyr.i), col = "black", linetype="dashed", lwd = 0.6) +
scale_x_continuous(breaks=seq(1980, 2010, 5)) +
scale_fill_manual(values = "lightblue") +
#scale_linetype_manual(name = "Climatology", values = 1, guide = guide_legend(override.aes = list(color = c("black")))) +
facet_wrap(~plotLabel, scales = "free", shrink = F, ncol = 1)
print(plot)

ggsave("fcstEnsembles_AllMeanLengths_blind_RFonly_limited.png", plot = print(plot), device = "png", width = 15, height = 10, dpi = 450, units = "in")
plot <- ggplot(subset(fcstReliability, Model != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7)), aes(pred, obs_freq, color = Model)) +
geom_vline(xintercept = 0, linetype = 'dashed', colour = 'grey20', size = 0.5) +
geom_vline(xintercept = 0.2, linetype = 'dashed', colour = 'grey20', size = 0.5) +
geom_vline(xintercept = 0.4, linetype = 'dashed', colour = 'grey20', size = 0.5) +
geom_vline(xintercept = 0.6, linetype = 'dashed', colour = 'grey20', size = 0.5) +
geom_vline(xintercept = 0.8, linetype = 'dashed', colour = 'grey20', size = 0.5) +
geom_vline(xintercept = 1, linetype = 'dashed', colour = 'grey20', size = 0.5) +
geom_point() +
theme_bw() +
xlab('Forecast Probability') +
ylab('Observed Frequency') +
scale_x_continuous(expand = c(0.0,0.0), breaks = seq(0,1,0.2), minor_breaks = seq(0,1,0.1), limits = c(-0.02,1.02))+
scale_y_continuous(expand = c(0.0,0.0), breaks = seq(0,1,0.2), limits = c(-0.02,1.02)) +
theme(panel.grid.minor = element_line(size = 0.02),
panel.grid.major = element_line(color = 'grey65', size = 0.5),
axis.title = element_text(face = "bold")) +
coord_fixed() +
theme(axis.title = element_text(face = "bold"), text=element_text(size=25)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
labs(color = "Model") +
facet_wrap(~plotLabel, shrink = F, nrow = 1) +
theme(panel.spacing.x = unit(0.4, "in"))
# gg_circle(r=0.02, xc=fcst_prob, yc=obs_freq, color = 'red')
print(plot)

ggsave("fcstReliabilityDiagrams_limited_blind_RFonly.png", plot = print(plot), device = "png", width = 15, height = 7.5, dpi = 450, units = "in")
p1b = ggplot(data = subset(fcstSkillScores, variable == "RPSS" & model != "CESM-DPLE, KNN")) +
geom_line(mapping = aes(x = startYear, y = value, linetype = model)) +
geom_point(mapping = aes(x = startYear, y = value, color = flowTercile)) +
theme_bw() +
#geom_boxplot(aes(fill = covariate)) +
geom_hline(yintercept = 0) + xlab(paste0("Starting year of projected N-year mean flow")) +
theme(axis.title = element_text(face = "bold"), axis.text.x = element_text(), text=element_text(size=25)) +
ylab("RPSS") +
guides(color=guide_legend("Flow Tercile")) +
#scale_x_discrete(breaks=seq(min(as.numeric(as.character(crpss.melt$startYear))), max(as.numeric(as.character(crpss.melt$startYear))), 10))
facet_wrap(~meanLength, scales = "free", shrink = F)
print(p1b)

ggsave("rpss_skillTS_AllMeanLengths_blind_RFonly.png", plot = print(p1b), device = "png", width = 15, height = 10, dpi = 450, units = "in")
p1c = ggplot(data = subset(fcstSkillScores, variable == "RPSS" & model != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7))) +
geom_line(mapping = aes(x = startYear, y = value, linetype = model), size = 1.1) +
geom_point(mapping = aes(x = startYear, y = value, color = flowTercile), size = 3.5) +
theme_bw() +
#geom_boxplot(aes(fill = covariate)) +
geom_hline(yintercept = 0) + xlab(paste0("Starting year of projected N-year mean flow")) +
theme(axis.title = element_text(face = "bold"), axis.text.x = element_text(), text=element_text(size=25)) +
ylab("RPSS") +
scale_x_continuous(breaks=seq(1980, 2015, 5)) +
guides(color=guide_legend("Flow Tercile")) +
#scale_x_discrete(breaks=seq(min(as.numeric(as.character(crpss.melt$startYear))), max(as.numeric(as.character(crpss.melt$startYear))), 10))
facet_wrap(~meanLength, scales = "free", shrink = F, ncol = 1)
print(p1c)

ggsave("rpss_skillTS_AllMeanLengths_blind_RFonly_limited.png", plot = print(p1c), device = "png", width = 15, height = 10, dpi = 450, units = "in")